#Loading libraries

suppressMessages(library(haven))
suppressMessages(library(dplyr))
suppressMessages(library(psych))
suppressMessages(library(tidyr))
suppressMessages(library(ggplot2))
suppressMessages(library(lcmm))
suppressMessages(library(tidyLPA))

#Loading data

d_raw <- read_sav("Hall.Lewis.Ellsworth(2018)_LongitudinalClimateChangeBeliefsandBehavior_OSF.sav")

#Dataset w/ variables of interest

#Variables of interest 
d <- d_raw |> 
  select(c( 'T1_important', 'T2_important', 'T3_important', 
           'T4_important', 'T5_important', 'T6_important', 'T7_important', 
           ))

#Getting the data w/out any NAs 
d_nomiss <- d[complete.cases(d), ]

#Save the data 
#write.csv(d_nomiss, "d_nomiss.csv", row.names = FALSE)

#Quick check corrs and dist 
pairs.panels(d_nomiss)

describe(d_nomiss$T1_important) 
##    vars   n mean  sd median trimmed  mad min max range  skew kurtosis  se
## X1    1 292    5 1.7      5    5.18 1.48   1   7     6 -0.74    -0.36 0.1
describe(d_nomiss$T2_important) 
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis  se
## X1    1 292 4.92 1.73      5    5.08 1.48   1   7     6 -0.66    -0.56 0.1
describe(d_nomiss$T3_important) 
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 292 4.76 1.85      5    4.91 1.48   1   7     6 -0.56    -0.78 0.11
describe(d_nomiss$T4_important) 
##    vars   n mean  sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 292 4.67 1.8      5    4.78 1.48   1   7     6 -0.48    -0.86 0.11
describe(d_nomiss$T5_important) 
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 292 4.72 1.83      5    4.85 1.48   1   7     6 -0.58    -0.83 0.11
describe(d_nomiss$T6_important) 
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 292 4.81 1.87      5    4.98 1.48   1   7     6 -0.61    -0.74 0.11
describe(d_nomiss$T7_important) 
##    vars   n mean   sd median trimmed  mad min max range  skew kurtosis   se
## X1    1 292 4.92 1.81      5    5.09 1.48   1   7     6 -0.68    -0.62 0.11

#Prepping the data

#Adding the subjects' id
d_nomiss <- d_nomiss  |> 
  mutate(id = row_number()) 

#Data into the long format
d_long <- d_nomiss  |> 
  pivot_longer(
    cols = -id,  
    names_to = c("time", ".value"),
    names_pattern = "(T\\d+)_(.*)"
  )

str(d_long)
## tibble [2,044 × 3] (S3: tbl_df/tbl/data.frame)
##  $ id       : int [1:2044] 1 1 1 1 1 1 1 2 2 2 ...
##  $ time     : chr [1:2044] "T1" "T2" "T3" "T4" ...
##  $ important: num [1:2044] 2 2 4 4 3 4 4 6 4 5 ...
#To save the dataset in the long format
#write.csv(d_long, "d_long.csv", row.names = FALSE)

#Important across time distribution

hist_imp <- ggplot(d_long, aes(x = important)) +
  geom_histogram(bins = 7, fill = "skyblue", color = "black") +  
  facet_wrap(~ time, scales = "free", ncol = 4) +  
  labs(title = "",
       x = "Importance CC Level",
       y = "Frequency") +
  theme_classic()

print(hist_imp)

#Spaghetti plot for Important

#Converting the Time variable into the factor
d_long$time <- factor(d_long$time, levels = c("T1", "T2", "T3", "T4", "T5", "T6", "T7"))

#Subset 100 random participants for better visibility of the plot
sampled_ids <- sample(unique(d_long$id), 100)  

#Plotting the random subset 
ggplot(d_long  |>  filter(id %in% sampled_ids), aes(x = time, y = important, group = id, color = as.factor(id))) +
  geom_line(alpha = 0.4) +
  labs(title = "",
       x = "Time", y = "Importance Level", color = "ID") +
  theme_classic()

#Plotting the subset with the LOESS curve for the overall trend -- doesn't look pretty
ggplot(d_long  |>  filter(id %in% sampled_ids), aes(x = time, y = important, group = id, color = as.factor(id))) +
  geom_line(alpha = 0.4) +  
  geom_smooth(method = "loess", se = FALSE, color = "grey23", span = 0.75, size = .5) +  
  labs(title = "Sample of Individual Trajectories for 'Important' Over Time",
       x = "Time", y = "Importance Level") +
  scale_y_continuous(limits = c(1, 7), breaks = c(1, 2, 3, 4, 5, 6, 7)) + 
  theme_minimal() +
  theme(legend.position = "none")  
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 590 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

#Average Importance at each time point across participants 
average_importance <- d_long  |> 
  group_by(time)  |> 
  summarise(average_important = mean(important, na.rm = TRUE))

#Plotting the average trend
ggplot(average_importance, aes(x = time, y = average_important, group = 1)) +  #group = 1 to ensure it treats all data as one group
  geom_line(color = "blue", size = 1.5) +  
  geom_point(color = "red", size = 3) +   
  labs(
    title = "Average Importance Over Time",
    x = "Time",
    y = "Average Importance"
  ) +
  scale_y_continuous(limits = c(1, 7), breaks = c(1, 2, 3, 4, 5, 6, 7)) +  
  theme_minimal()

#Individual and Average overall trajectories
ggplot(d_long |> filter(id %in% sampled_ids), aes(
  x = time, y = important, group = id, color = as.factor(id))) +
  geom_line(alpha = 0.4) +  
  geom_line(data = average_importance, aes(x = time, 
                                           y = average_important, group = 1), 
            color = "black", size = 1.5) +  #overall average trend line
  labs(title = "",
       x = "Time point", y = "Importance Level") +
  theme_classic() +
  theme(legend.position = "none") 

#Trajectories for those in Low vs High group of Importance at T1

#Preparing the data for further plotting
d_long <- d_long  |> 
  group_by(id) |> 
  mutate(initial_important = important[time == "T1"])  |> 
  ungroup()

#Calculating the cutoff for median value
cutoff <- median(d_long$initial_important, na.rm = TRUE)

#The grouping variable
d_long <- d_long |> 
  mutate(group_at_T1 = ifelse(initial_important >= cutoff, "Above", "Below"))

#How many observations/% are above or below the median
t <- table(d_long$group_at_T1)
t_p <- t / sum(t) * 100
print(t_p)
## 
##    Above    Below 
## 68.83562 31.16438
#Plotting with group separation
ggplot(d_long, aes(x = time, y = important, 
                   group = id, color = group_at_T1)) +
  geom_line(alpha = 0.4) +
  facet_wrap(~group_at_T1, scales = "free_y") +
  geom_smooth(method = "loess", se = FALSE, aes(
    group = group_at_T1), color = "black") +
  labs(title = "",
       x = "Time Point", y = "Importance Level", color = "Median Group") +
  scale_y_continuous(limits = c(1, 7), breaks = c(1, 2, 3, 4, 5, 6, 7)) +
  theme_classic() +
  theme(legend.position = "bottom")
## `geom_smooth()` using formula = 'y ~ x'

#Fitting a simple GMM step by step. Linear

#Fit a simple linear growth model
basic_model <- hlme(fixed = important ~ time, random = ~ time, subject = 'id', data = d_long)
summary(basic_model)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ time, random = ~time, subject = "id", 
##     data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 1 
##      Number of parameters: 36  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  13 
##      Convergence criteria: parameters= 8.5e-07 
##                          : likelihood= 1.9e-05 
##                          : second derivatives= 3.5e-11 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2761  
##      AIC: 5594  
##      BIC: 5726.36  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##               coef      Se   Wald p-value
## intercept  5.00000 0.09926 50.373 0.00000
## timeT2    -0.08219 0.05691 -1.444 0.14868
## timeT3    -0.24315 0.06917 -3.515 0.00044
## timeT4    -0.32877 0.07123 -4.615 0.00000
## timeT5    -0.27740 0.07446 -3.725 0.00020
## timeT6    -0.19178 0.07514 -2.552 0.01071
## timeT7    -0.08219 0.07347 -1.119 0.26325
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept  timeT2  timeT3  timeT4  timeT5  timeT6  timeT7
## intercept   2.57959                                                
## timeT2     -0.11726 0.35105                                        
## timeT3     -0.14123 0.33358 0.80211                                
## timeT4     -0.27479 0.27859 0.74622 0.88671                        
## timeT5     -0.27822 0.22117 0.69707 0.83770 1.02401                
## timeT6     -0.22000 0.23848 0.68022 0.73571 0.90995 1.05390        
## timeT7     -0.29534 0.22352 0.66577 0.75120 0.89925 0.91314 0.98119
## 
##                              coef      Se
## Residual standard error:  0.54509 0.55575
#Latent class models
lcga1 <- hlme(important ~ time, subject = "id", ng = 1, data = d_long)
lcga2 <- gridsearch(rep = 100, maxiter = 10, minit = lcga1, 
                    m = hlme(important ~ time, subject = "id",
                             ng = 2, data = d_long, mixture = ~ time))
lcga3 <- gridsearch(rep = 100, maxiter = 10, minit = lcga1, m = hlme(
  important ~ time, subject = "id", ng = 3, data = d_long, mixture = ~ time))

#Compare the models with diff numbers of clusters 
summarytable(lcga1, lcga2, lcga3) 
##       G    loglik npm      BIC   %class1  %class2 %class3
## lcga1 1 -4097.333   8 8240.079 100.00000                 
## lcga2 2 -3235.341  16 6561.509  31.16438 68.83562        
## lcga3 3 -2989.113  24 6114.468  28.42466 47.60274 23.9726
summary(lcga3)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ time, mixture = ~time, subject = "id", 
##     ng = 3, data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 3 
##      Number of parameters: 24  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  1 
##      Convergence criteria: parameters= 2.9e-13 
##                          : likelihood= 1.4e-11 
##                          : second derivatives= 1.3e-15 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2989.11  
##      AIC: 6026.23  
##      BIC: 6114.47  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                      coef      Se   Wald p-value
## intercept class1  0.15621 0.17696  0.883 0.37737
## intercept class2  0.72535 0.17010  4.264 0.00002
## 
## Fixed effects in the longitudinal model:
## 
##                      coef      Se   Wald p-value
## intercept class1  4.58803 0.13915 32.971 0.00000
## intercept class2  6.24319 0.08339 74.865 0.00000
## intercept class3  2.91385 0.12056 24.170 0.00000
## timeT2 class1     0.12065 0.15118  0.798 0.42485
## timeT2 class2    -0.07128 0.10878 -0.655 0.51232
## timeT2 class3    -0.34186 0.15867 -2.155 0.03119
## timeT3 class1    -0.17737 0.15229 -1.165 0.24414
## timeT3 class2    -0.02540 0.10893 -0.233 0.81561
## timeT3 class3    -0.76980 0.16476 -4.672 0.00000
## timeT4 class1    -0.30389 0.15034 -2.021 0.04325
## timeT4 class2    -0.15869 0.10865 -1.461 0.14413
## timeT4 class3    -0.70914 0.15839 -4.477 0.00001
## timeT5 class1    -0.19358 0.15385 -1.258 0.20832
## timeT5 class2    -0.11146 0.10901 -1.022 0.30656
## timeT5 class3    -0.71812 0.15726 -4.567 0.00000
## timeT6 class1    -0.04687 0.15753 -0.298 0.76607
## timeT6 class2     0.01133 0.10790  0.105 0.91641
## timeT6 class3    -0.78070 0.15958 -4.892 0.00000
## timeT7 class1     0.06349 0.15721  0.404 0.68633
## timeT7 class2     0.06633 0.10939  0.606 0.54429
## timeT7 class3    -0.55926 0.15765 -3.548 0.00039
## 
##                              coef      Se
## Residual standard error:  0.90905 0.01443
#Fit GMM w/ only Random intercept for Importance 
set.seed(2002)
#gmm1 <- hlme(important ~ time, subject = "id", random = ~1, 
#ng = 1, data = d_long)
#gmm2 <- gridsearch(rep = 100, maxiter = 10, minit = gmm1, 
                  # hlme(important ~ time, subject = "id", random = ~1, 
#ng = 2, data = d_long, 
                      #  mixture = ~ time, nwg = TRUE)) 
#gmm3 <- gridsearch(rep = 100, maxiter = 10, minit = gmm1, 
                #   hlme(important ~ time, subject = "id", 
#random = ~1, ng = 3, data = d_long, 
                       # mixture = ~ time, nwg = TRUE)) 

#To save the outputs 
#gmm1, gmm2, gmm3
#save(gmm1, file = 'gmm1.Rdata')
#save(gmm2, file = 'gmm2.Rdata')
#save(gmm3, file = 'gmm3.Rdata')
load('gmm1.Rdata')
load('gmm2.Rdata')
load('gmm3.Rdata')

#Compare the models with diff numbers of clusters
summarytable(gmm1, gmm2, gmm3) 
##      G    loglik npm      BIC   %class1   %class2  %class3
## gmm1 1 -2869.338   9 5789.767 100.00000                   
## gmm2 2 -2806.669  18 5715.519  68.49315 31.506849         
## gmm3 3 -2750.968  27 5655.208  68.83562  1.712329 29.45205
#Fit GMM w/ Random intercept and Slopes for Importance
set.seed(2002)
#gmm1_2 <- hlme(important ~ time, subject = "id", random = ~1 + time, ng = 1, data = d_long)
#gmm2_2 <- gridsearch(rep = 50, maxiter = 10, minit = gmm1_2, 
                  # hlme(important ~ time, subject = "id", random = ~1 + time, ng = 2, 
                      #  data = d_long, mixture = ~ time, nwg = TRUE)) 
#gmm3_2 <- gridsearch(rep = 50, maxiter = 10, minit = gmm1_2, 
                  # hlme(important ~ time, subject = "id", random = ~1 + time, ng = 3, 
                       # data = d_long, mixture = ~ time, nwg = TRUE)) 

#To save the outputs 
#save(gmm1_2, file = 'gmm1_2.Rdata')
#save(gmm2_2, file = 'gmm2_2.Rdata')
#save(gmm3_2, file = 'gmm3_2.Rdata')

load('gmm1_2.Rdata')
load('gmm2_2.Rdata')
load('gmm3_2.Rdata')

#Compare the models with diff numbers of clusters
summarytable(gmm1_2, gmm2_2, gmm3_2) 
##        G    loglik npm      BIC   %class1  %class2  %class3
## gmm1_2 1 -2761.001  36 5726.365 100.00000                  
## gmm2_2 2 -2577.978  45 5411.411  42.12329 57.87671         
## gmm3_2 3 -2298.208  54 4902.960  14.72603 10.61644 74.65753
summary(gmm3_2) 
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ time, mixture = ~time, random = ~1 + 
##     time, subject = "id", ng = 3, nwg = TRUE, data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 3 
##      Number of parameters: 54  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  22 
##      Convergence criteria: parameters= 2.7e-06 
##                          : likelihood= 4e-06 
##                          : second derivatives= 2.4e-11 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2298.21  
##      AIC: 4704.42  
##      BIC: 4902.96  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                      coef      Se    Wald p-value
## intercept class1 -1.36400 0.22930  -5.949 0.00000
## intercept class2 -1.90302 0.19563  -9.728 0.00000
## 
## Fixed effects in the longitudinal model:
## 
##                      coef      Se    Wald p-value
## intercept class1  4.41382 0.78535   5.620 0.00000
## intercept class2  6.74202 0.02998 224.896 0.00000
## intercept class3  4.89008 0.19009  25.725 0.00000
## timeT2 class1    -0.06770 0.24545  -0.276 0.78269
## timeT2 class2     0.00000 0.00820   0.000 0.99999
## timeT2 class3    -0.09815 0.05961  -1.647 0.09964
## timeT3 class1    -0.62914 0.28077  -2.241 0.02504
## timeT3 class2     0.00000 0.00913   0.000 0.99999
## timeT3 class3    -0.18074 0.06913  -2.614 0.00894
## timeT4 class1    -0.59280 0.27872  -2.127 0.03343
## timeT4 class2     0.00000 0.00912   0.000 0.99999
## timeT4 class3    -0.31029 0.06928  -4.479 0.00001
## timeT5 class1    -0.47067 0.29564  -1.592 0.11138
## timeT5 class2     0.00000 0.00941   0.000 0.99999
## timeT5 class3    -0.26935 0.07342  -3.668 0.00024
## timeT6 class1    -0.30469 0.29992  -1.016 0.30967
## timeT6 class2     0.00000 0.00974   0.000 0.99999
## timeT6 class3    -0.19151 0.07365  -2.600 0.00932
## timeT7 class1    -0.22251 0.28724  -0.775 0.43855
## timeT7 class2     0.00000 0.00934   0.000 0.99999
## timeT7 class3    -0.05858 0.07083  -0.827 0.40824
## 
## 
## Variance-covariance matrix of the random-effects:
##           intercept  timeT2  timeT3  timeT4  timeT5  timeT6 timeT7
## intercept   7.32417                                               
## timeT2     -0.24690 0.60375                                       
## timeT3     -0.23270 0.40827 0.81874                               
## timeT4     -0.28863 0.36215 0.58622 0.80842                       
## timeT5     -0.27353 0.32922 0.56029 0.59100 0.89436               
## timeT6     -0.27033 0.34512 0.54962 0.52689 0.65725 0.94277       
## timeT7     -0.27184 0.35428 0.54083 0.57175 0.64208 0.65025 0.8492
## 
##                                     coef      Se
## Proportional coefficient class1  2.08836 0.09973
## Proportional coefficient class2  0.06172 0.00589
## Residual standard error:         0.00000 0.00244

#Simple GMM. Non-linear

d_long$time_numeric <- as.numeric(sub("T", "", d_long$time))
#Fit a simple growth model
nl_model <- hlme(fixed = important ~ poly(time_numeric, 2), 
                 random = ~ poly(time_numeric, 2), 
              subject = 'id', data = d_long) 
summary(nl_model) 
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ poly(time_numeric, 2), random = ~poly(time_numeric, 
##     2), subject = "id", data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 1 
##      Number of parameters: 10  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  15 
##      Convergence criteria: parameters= 7.3e-07 
##                          : likelihood= 2e-07 
##                          : second derivatives= 1.3e-14 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2787.63  
##      AIC: 5595.27  
##      BIC: 5632.04  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##                 coef      Se    Wald p-value
## intercept    4.82779 0.09651  50.024 0.00000
## poly(...)1  -1.61466 1.07469  -1.502 0.13298
## poly(...)2   4.59728 0.79935   5.751 0.00000
## 
## 
## Variance-covariance matrix of the random-effects:
##            intercept poly(...)1 poly(...)2
## intercept    2.65700                      
## poly(...)1   5.13786  209.07143           
## poly(...)2  -3.76909  -67.54998   58.40069
## 
##                               coef      Se
## Residual standard error:   0.66255 0.01371
#Cubic polynomial growth model
cubic_model <- hlme(fixed = important ~ poly(time_numeric, 3), 
                    random = ~ poly(time_numeric, 3), 
                   subject = 'id', data = d_long)
summary(cubic_model) 
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ poly(time_numeric, 3), random = ~poly(time_numeric, 
##     3), subject = "id", data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 1 
##      Number of parameters: 15  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  26 
##      Convergence criteria: parameters= 3.4e-06 
##                          : likelihood= 2.5e-06 
##                          : second derivatives= 5.5e-06 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2778.25  
##      AIC: 5586.51  
##      BIC: 5641.66  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##                 coef      Se    Wald p-value
## intercept    4.82779 0.09651  50.024 0.00000
## poly(...)1  -1.61469 1.07592  -1.501 0.13342
## poly(...)2   4.59730 0.80067   5.742 0.00000
## poly(...)3   0.43002 0.70944   0.606 0.54442
## 
## 
## Variance-covariance matrix of the random-effects:
##            intercept poly(...)1 poly(...)2 poly(...)3
## intercept    2.66001                                 
## poly(...)1   5.13799  215.40198                      
## poly(...)2  -3.76904  -67.21056   65.15327           
## poly(...)3   0.49366  -33.88439  -17.82144    23.5836
## 
##                               coef      Se
## Residual standard error:   0.64648 0.01339
#Quartic polynomial growth model
quartic_model <- hlme(fixed = important ~ poly(time_numeric, 4), 
                      random = ~ poly(time_numeric, 4), 
                   subject = 'id', data = d_long)
summary(quartic_model) 
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ poly(time_numeric, 4), random = ~poly(time_numeric, 
##     4), subject = "id", data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 1 
##      Number of parameters: 21  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  91 
##      Convergence criteria: parameters= 1.4e-06 
##                          : likelihood= 1.7e-05 
##                          : second derivatives= 8.7e-05 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2769.12  
##      AIC: 5580.24  
##      BIC: 5657.45  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##                 coef      Se    Wald p-value
## intercept    4.82779 0.09651  50.024 0.00000
## poly(...)1  -1.61465 1.07474  -1.502 0.13300
## poly(...)2   4.59727 0.79962   5.749 0.00000
## poly(...)3   0.43008 0.70316   0.612 0.54078
## poly(...)4  -1.13176 0.69547  -1.627 0.10367
## 
## 
## Variance-covariance matrix of the random-effects:
##            intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept    2.66451                                            
## poly(...)1   5.14393  224.43951                                 
## poly(...)2  -3.77516  -67.53643   73.85687                      
## poly(...)3   0.49284  -34.37230  -18.65112   31.43926           
## poly(...)4   0.00078   25.37764  -25.98317   -4.83866    28.3847
## 
##                               coef      Se
## Residual standard error:   0.62163 0.01769
#To save outputs 
#save(nl_model, file = 'nl_model.Rdata')
#save(cubic_model, file = 'cubic_model.Rdata')
#save(quartic_model, file = 'quartic_model.Rdata')

#load('nl_model.Rdata')
#load('cubic_model.Rdata')
#load('quartic_model.Rdata')

#Compare models
summarytable(nl_model, cubic_model, quartic_model) 
##               G    loglik npm      BIC %class1
## nl_model      1 -2787.635  10 5632.037     100
## cubic_model   1 -2778.255  15 5641.661     100
## quartic_model 1 -2769.120  21 5657.452     100
summary(quartic_model)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ poly(time_numeric, 4), random = ~poly(time_numeric, 
##     4), subject = "id", data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 1 
##      Number of parameters: 21  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  91 
##      Convergence criteria: parameters= 1.4e-06 
##                          : likelihood= 1.7e-05 
##                          : second derivatives= 8.7e-05 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2769.12  
##      AIC: 5580.24  
##      BIC: 5657.45  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##                 coef      Se    Wald p-value
## intercept    4.82779 0.09651  50.024 0.00000
## poly(...)1  -1.61465 1.07474  -1.502 0.13300
## poly(...)2   4.59727 0.79962   5.749 0.00000
## poly(...)3   0.43008 0.70316   0.612 0.54078
## poly(...)4  -1.13176 0.69547  -1.627 0.10367
## 
## 
## Variance-covariance matrix of the random-effects:
##            intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept    2.66451                                            
## poly(...)1   5.14393  224.43951                                 
## poly(...)2  -3.77516  -67.53643   73.85687                      
## poly(...)3   0.49284  -34.37230  -18.65112   31.43926           
## poly(...)4   0.00078   25.37764  -25.98317   -4.83866    28.3847
## 
##                               coef      Se
## Residual standard error:   0.62163 0.01769
#Latent class nonlinear growth model with polynomial terms
set.seed(2002)
#nl_lcga1 <- hlme(fixed = important ~ poly(time_numeric, 3), subject = "id", ng = 1, data = d_long)
#nl_lcga2 <- gridsearch(rep = 50, maxiter = 10, minit = nl_lcga1, 
                    #   m = hlme(fixed = important ~ poly(time_numeric, 3),
                             #   subject = "id", ng = 2, data = d_long, 
                         #       mixture = ~ poly(time_numeric, 3)))
#nl_lcga3 <- gridsearch(rep = 50, maxiter = 10, minit = nl_lcga1, 
                    #   m = hlme(fixed = important ~ poly(time_numeric, 3),
                              #  subject = "id", ng = 3, data = d_long, 
                              #  mixture = ~ poly(time_numeric, 3)))

#To save outputs 
#save(nl_lcga1, file = 'nl_lcga1.Rdata')
#save(nl_lcga2, file = 'nl_lcga2.Rdata')
#save(nl_lcga3, file = 'nl_lcga3.Rdata')

load('nl_lcga1.Rdata')
load('nl_lcga2.Rdata')
load('nl_lcga3.Rdata')

#Compare models
summarytable(nl_lcga1, nl_lcga2, nl_lcga3)
##          G    loglik npm      BIC   %class1  %class2 %class3
## nl_lcga1 1 -4097.555   5 8223.493 100.00000                 
## nl_lcga2 2 -3236.616  10 6530.000  68.83562 31.16438        
## nl_lcga3 3 -2993.315  15 6071.781  28.42466 47.60274 23.9726
summary(nl_lcga3)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ poly(time_numeric, 3), mixture = ~poly(time_numeric, 
##     3), subject = "id", ng = 3, data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 3 
##      Number of parameters: 15  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  1 
##      Convergence criteria: parameters= 2.7e-06 
##                          : likelihood= 1.4e-06 
##                          : second derivatives= 2.3e-09 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2993.31  
##      AIC: 6016.63  
##      BIC: 6071.78  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                      coef      Se    Wald p-value
## intercept class1  0.15115 0.17954   0.842 0.39988
## intercept class2  0.71649 0.18095   3.960 0.00008
## 
## Fixed effects in the longitudinal model:
## 
##                       coef      Se    Wald p-value
## intercept class1   4.52114 0.12733  35.507 0.00000
## intercept class2   6.20454 0.04876 127.251 0.00000
## intercept class3   2.36593 0.08121  29.135 0.00000
## poly(...)1 class1 -0.46427 2.16701  -0.214 0.83036
## poly(...)1 class2  0.87565 1.36822   0.640 0.52218
## poly(...)1 class3 -8.05093 1.93781  -4.155 0.00003
## poly(...)2 class1  4.98746 1.80793   2.759 0.00580
## poly(...)2 class2  2.54575 1.32793   1.917 0.05523
## poly(...)2 class3  8.34346 1.94792   4.283 0.00002
## poly(...)3 class1  1.62255 1.83602   0.884 0.37684
## poly(...)3 class2  0.50938 1.32228   0.385 0.70007
## poly(...)3 class3 -1.11959 1.93842  -0.578 0.56355
## 
##                              coef      Se
## Residual standard error:  0.91109 0.01450
#GMM with nonlinear random effects Cubic. Random intercept for Importance 
set.seed(2002)
#nl_gmm1 <- hlme(important ~ poly(time_numeric, 3), subject = "id", random = ~1, ng = 1, data = d_long)
#nl_gmm2 <- gridsearch(rep = 50, maxiter = 10, minit = nl_gmm1, 
               #    hlme(important ~ poly(time_numeric, 3), subject = "id", random = ~1, 
                    #    ng = 2, data = d_long, 
                      #  mixture = ~ poly(time_numeric, 3), nwg = TRUE)) 
#nl_gmm3 <- gridsearch(rep = 50, maxiter = 10, minit = nl_gmm1, 
               #    hlme(important ~ poly(time_numeric, 3), subject = "id", 
                       # random = ~1, ng = 3, data = d_long, 
                    #    mixture = ~ poly(time_numeric, 3), nwg = TRUE)) 

#Save outputs 
#save(nl_gmm1, file = 'nl_gmm1.Rdata')
#save(nl_gmm2, file = 'nl_gmm2.Rdata')
#save(nl_gmm3, file = 'nl_gmm3.Rdata')

load('nl_gmm1.Rdata')
load('nl_gmm2.Rdata')
load('nl_gmm3.Rdata')

#Compare the models with diff numbers of clusters
summarytable(nl_gmm1, nl_gmm2, nl_gmm3) 
##         G    loglik npm      BIC   %class1   %class2  %class3
## nl_gmm1 1 -2778.255  15 5641.661 100.00000                   
## nl_gmm2 2 -2694.605  21 5508.423  55.47945 44.520548         
## nl_gmm3 3 -2676.508  27 5506.288  55.47945  4.794521 39.72603
#Fit GMM w/ Random intercept and Slopes for Importance 
set.seed(2002)
#nl_gmm1_2 <- hlme(important ~ poly(time_numeric, 3), subject = "id", random = ~1 + poly(time_numeric, 3), ng = 1, data = d_long)
#nl_gmm2_2 <- gridsearch(rep = 50, maxiter = 10, minit = nl_gmm1, 
                 #  hlme(important ~ poly(time_numeric, 3), subject = "id", 
                    #    random = ~1 + poly(time_numeric, 3), 
                    #    ng = 2, data = d_long, 
                    #    mixture = ~ poly(time_numeric, 3), nwg = TRUE)) 
#nl_gmm3_2 <- gridsearch(rep = 50, maxiter = 10, minit = nl_gmm1, 
                 #  hlme(important ~ poly(time_numeric, 3), subject = "id", 
                     #   random = ~1 + poly(time_numeric, 3), ng = 3, data = d_long, 
                     #   mixture = ~ poly(time_numeric, 3), nwg = TRUE)) 

#Quartic model for GMM random slope/random intercept 
#nl_gmm4_2 <- gridsearch(rep = 50, maxiter = 10, minit = quartic_model, 
               #  hlme(important ~ poly(time_numeric, 4), subject = "id", 
                   #   random = ~1 + poly(time_numeric, 4), ng = 3, 
                   #   data = d_long, 
                   #   mixture = ~ poly(time_numeric, 4), nwg = TRUE)) 

#nl_gmm4_2a <- gridsearch(rep = 50, maxiter = 10, minit = quartic_model, 
              #  hlme(important ~ poly(time_numeric, 4), subject = "id", 
               #   random = ~1 + poly(time_numeric, 4), ng = 2, 
                #  data = d_long, 
                #  mixture = ~ poly(time_numeric, 4), nwg = TRUE)) 

#To save outputs 
#save(nl_gmm1_2, file = 'nl_gmm1_2.Rdata')
#save(nl_gmm2_2, file = 'nl_gmm2_2.Rdata')
#save(nl_gmm3_2, file = 'nl_gmm3_2.Rdata')
#save(nl_gmm4_2, file = 'nl_gmm4_2.Rdata')
#save(nl_gmm4_2a, file = 'nl_gmm4_2a.Rdata')

load('nl_gmm1_2.Rdata')
load('nl_gmm2_2.Rdata')
load('nl_gmm3_2.Rdata')
load('nl_gmm4_2.Rdata')
load('nl_gmm4_2a.Rdata')

#Compare the models with diff numbers of clusters
summarytable(nl_gmm1_2, nl_gmm2_2, nl_gmm3_2, nl_gmm4_2, nl_gmm4_2a) 
##            G    loglik npm      BIC   %class1   %class2  %class3
## nl_gmm1_2  1 -2778.255  15 5641.661 100.00000                   
## nl_gmm2_2  2 -2694.605  21 5508.423  55.47945 44.520548         
## nl_gmm4_2a 2 -2668.567  28 5496.083  54.45205 45.547945         
## nl_gmm3_2  3 -2676.508  27 5506.288  55.47945  4.794521 39.72603
## nl_gmm4_2  3 -2644.492  35 5487.670  54.79452  5.136986 40.06849
summary(nl_gmm4_2) 
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ poly(time_numeric, 4), mixture = ~poly(time_numeric, 
##     4), random = ~1 + poly(time_numeric, 4), subject = "id", 
##     ng = 3, nwg = TRUE, data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 3 
##      Number of parameters: 35  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  1 
##      Convergence criteria: parameters= 5.7e-06 
##                          : likelihood= 1.9e-07 
##                          : second derivatives= 1e-08 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2644.49  
##      AIC: 5358.98  
##      BIC: 5487.67  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                       coef       Se    Wald p-value
## intercept class1   0.19079  0.22496   0.848 0.39636
## intercept class2  -1.89618  0.39903  -4.752 0.00000
## 
## Fixed effects in the longitudinal model:
## 
##                        coef       Se    Wald p-value
## intercept class1    6.06240  0.08533  71.050 0.00000
## intercept class2    3.80799  0.88291   4.313 0.00002
## intercept class3    3.48675  0.21962  15.876 0.00000
## poly(...)1 class1   0.14202  1.17294   0.121 0.90362
## poly(...)1 class2   3.33939 10.22462   0.327 0.74397
## poly(...)1 class3  -4.48520  1.88940  -2.374 0.01760
## poly(...)2 class1   2.37450  1.01521   2.339 0.01934
## poly(...)2 class2  10.72373  7.34014   1.461 0.14402
## poly(...)2 class3   6.36761  1.42746   4.461 0.00001
## poly(...)3 class1   1.02557  0.94443   1.086 0.27752
## poly(...)3 class2   3.77645  6.49285   0.582 0.56081
## poly(...)3 class3  -0.79298  1.36100  -0.583 0.56013
## poly(...)4 class1   0.59568  0.97924   0.608 0.54298
## poly(...)4 class2  -2.77164  5.74204  -0.483 0.62931
## poly(...)4 class3  -2.97614  1.39955  -2.126 0.03346
## 
## 
## Variance-covariance matrix of the random-effects:
##            intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept    1.80174                                            
## poly(...)1   3.50138  200.75178                                 
## poly(...)2  -1.57054  -47.39359   85.32228                      
## poly(...)3  -0.26943  -27.80353  -17.66082   58.38695           
## poly(...)4  -3.59686    8.56736  -15.02654   -3.40329   50.79176
## 
##                                      coef       Se
## Proportional coefficient class1   0.43556  0.05540
## Proportional coefficient class2   2.76164  0.35362
## Residual standard error:          0.56763  0.01320
summary(nl_gmm3_2) 
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ poly(time_numeric, 3), mixture = ~poly(time_numeric, 
##     3), random = ~1 + poly(time_numeric, 3), subject = "id", 
##     ng = 3, nwg = TRUE, data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 3 
##      Number of parameters: 27  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  4 
##      Convergence criteria: parameters= 1.6e-06 
##                          : likelihood= 6.6e-09 
##                          : second derivatives= 5.4e-14 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2676.51  
##      AIC: 5407.02  
##      BIC: 5506.29  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                       coef       Se    Wald p-value
## intercept class1   0.25073  0.23689   1.058 0.28986
## intercept class2  -1.93928  0.43506  -4.458 0.00001
## 
## Fixed effects in the longitudinal model:
## 
##                        coef       Se    Wald p-value
## intercept class1    6.04779  0.09016  67.078 0.00000
## intercept class2    3.82640  0.90442   4.231 0.00002
## intercept class3    3.40414  0.23447  14.518 0.00000
## poly(...)1 class1   0.24464  1.17743   0.208 0.83541
## poly(...)1 class2   4.32167 10.41132   0.415 0.67807
## poly(...)1 class3  -4.85749  2.05679  -2.362 0.01819
## poly(...)2 class1   2.69501  1.04042   2.590 0.00959
## poly(...)2 class2  11.41774  9.48654   1.204 0.22876
## poly(...)2 class3   6.06080  1.62947   3.719 0.00020
## poly(...)3 class1   0.55181  1.00672   0.548 0.58361
## poly(...)3 class2   6.19991  7.25660   0.854 0.39289
## poly(...)3 class3  -0.55619  1.45656  -0.382 0.70257
## 
## 
## Variance-covariance matrix of the random-effects:
##            intercept poly(...)1 poly(...)2 poly(...)3
## intercept    1.65369                                 
## poly(...)1   2.64439  184.25040                      
## poly(...)2  -1.91815  -49.59072   77.34515           
## poly(...)3   0.91305  -27.68254  -17.17365   48.83857
## 
##                                      coef       Se
## Proportional coefficient class1   0.45352  0.06566
## Proportional coefficient class2   2.82788  0.43949
## Residual standard error:          0.61158  0.01275
summary(nl_gmm4_2a)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ poly(time_numeric, 4), mixture = ~poly(time_numeric, 
##     4), random = ~1 + poly(time_numeric, 4), subject = "id", 
##     ng = 2, nwg = TRUE, data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 2 
##      Number of parameters: 28  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  1 
##      Convergence criteria: parameters= 1e-07 
##                          : likelihood= 2.3e-08 
##                          : second derivatives= 2.7e-10 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2668.57  
##      AIC: 5393.13  
##      BIC: 5496.08  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                        coef      Se     Wald p-value
## intercept class1    0.05299 0.16370    0.324 0.74617
## 
## Fixed effects in the longitudinal model:
## 
##                         coef      Se     Wald p-value
## intercept class1     6.05673 0.07170   84.479 0.00000
## intercept class2     3.53197 0.16521   21.378 0.00000
## poly(...)1 class1   -0.02020 1.21610   -0.017 0.98675
## poly(...)1 class2   -3.29596 1.97320   -1.670 0.09485
## poly(...)2 class1    2.36139 0.97825    2.414 0.01578
## poly(...)2 class2    6.95486 1.40516    4.950 0.00000
## poly(...)3 class1    0.99862 0.93882    1.064 0.28746
## poly(...)3 class2   -0.16953 1.26011   -0.135 0.89298
## poly(...)4 class1    0.48238 0.96017    0.502 0.61539
## poly(...)4 class2   -2.83377 1.26370   -2.242 0.02493
## 
## 
## Variance-covariance matrix of the random-effects:
##            intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept    2.35263                                            
## poly(...)1   6.06111  411.89928                                 
## poly(...)2  -2.58216 -102.76881  168.21534                      
## poly(...)3  -0.17124  -54.48308  -25.76125  106.66945           
## poly(...)4  -4.39797   35.85987  -34.51189  -11.62765   92.73458
## 
##                                       coef      Se
## Proportional coefficient class1    0.36364 0.03122
## Residual standard error:           0.56840 0.01351
summary(nl_gmm4_2)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ poly(time_numeric, 4), mixture = ~poly(time_numeric, 
##     4), random = ~1 + poly(time_numeric, 4), subject = "id", 
##     ng = 3, nwg = TRUE, data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 3 
##      Number of parameters: 35  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  1 
##      Convergence criteria: parameters= 5.7e-06 
##                          : likelihood= 1.9e-07 
##                          : second derivatives= 1e-08 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2644.49  
##      AIC: 5358.98  
##      BIC: 5487.67  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the class-membership model:
## (the class of reference is the last class) 
## 
##                       coef       Se    Wald p-value
## intercept class1   0.19079  0.22496   0.848 0.39636
## intercept class2  -1.89618  0.39903  -4.752 0.00000
## 
## Fixed effects in the longitudinal model:
## 
##                        coef       Se    Wald p-value
## intercept class1    6.06240  0.08533  71.050 0.00000
## intercept class2    3.80799  0.88291   4.313 0.00002
## intercept class3    3.48675  0.21962  15.876 0.00000
## poly(...)1 class1   0.14202  1.17294   0.121 0.90362
## poly(...)1 class2   3.33939 10.22462   0.327 0.74397
## poly(...)1 class3  -4.48520  1.88940  -2.374 0.01760
## poly(...)2 class1   2.37450  1.01521   2.339 0.01934
## poly(...)2 class2  10.72373  7.34014   1.461 0.14402
## poly(...)2 class3   6.36761  1.42746   4.461 0.00001
## poly(...)3 class1   1.02557  0.94443   1.086 0.27752
## poly(...)3 class2   3.77645  6.49285   0.582 0.56081
## poly(...)3 class3  -0.79298  1.36100  -0.583 0.56013
## poly(...)4 class1   0.59568  0.97924   0.608 0.54298
## poly(...)4 class2  -2.77164  5.74204  -0.483 0.62931
## poly(...)4 class3  -2.97614  1.39955  -2.126 0.03346
## 
## 
## Variance-covariance matrix of the random-effects:
##            intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept    1.80174                                            
## poly(...)1   3.50138  200.75178                                 
## poly(...)2  -1.57054  -47.39359   85.32228                      
## poly(...)3  -0.26943  -27.80353  -17.66082   58.38695           
## poly(...)4  -3.59686    8.56736  -15.02654   -3.40329   50.79176
## 
##                                      coef       Se
## Proportional coefficient class1   0.43556  0.05540
## Proportional coefficient class2   2.76164  0.35362
## Residual standard error:          0.56763  0.01320
summary(quartic_model)
## Heterogenous linear mixed model 
##      fitted by maximum likelihood method 
##  
## hlme(fixed = important ~ poly(time_numeric, 4), random = ~poly(time_numeric, 
##     4), subject = "id", data = d_long)
##  
## Statistical Model: 
##      Dataset: d_long 
##      Number of subjects: 292 
##      Number of observations: 2044 
##      Number of latent classes: 1 
##      Number of parameters: 21  
##  
## Iteration process: 
##      Convergence criteria satisfied 
##      Number of iterations:  91 
##      Convergence criteria: parameters= 1.4e-06 
##                          : likelihood= 1.7e-05 
##                          : second derivatives= 8.7e-05 
##  
## Goodness-of-fit statistics: 
##      maximum log-likelihood: -2769.12  
##      AIC: 5580.24  
##      BIC: 5657.45  
##  
##  
## Maximum Likelihood Estimates: 
##  
## Fixed effects in the longitudinal model:
## 
##                 coef      Se    Wald p-value
## intercept    4.82779 0.09651  50.024 0.00000
## poly(...)1  -1.61465 1.07474  -1.502 0.13300
## poly(...)2   4.59727 0.79962   5.749 0.00000
## poly(...)3   0.43008 0.70316   0.612 0.54078
## poly(...)4  -1.13176 0.69547  -1.627 0.10367
## 
## 
## Variance-covariance matrix of the random-effects:
##            intercept poly(...)1 poly(...)2 poly(...)3 poly(...)4
## intercept    2.66451                                            
## poly(...)1   5.14393  224.43951                                 
## poly(...)2  -3.77516  -67.53643   73.85687                      
## poly(...)3   0.49284  -34.37230  -18.65112   31.43926           
## poly(...)4   0.00078   25.37764  -25.98317   -4.83866    28.3847
## 
##                               coef      Se
## Residual standard error:   0.62163 0.01769

#Predictions vs Observations for 3 classes quartic

postprob <- postprob(nl_gmm4_2)
##  
## Posterior classification: 
##   class1 class2 class3
## N 160.00  15.00 117.00
## %  54.79   5.14  40.07
##  
## Posterior classification table: 
##      --> mean of posterior probabilities in each class 
##         prob1  prob2  prob3
## class1 0.8997 0.0043 0.0960
## class2 0.0022 0.8845 0.1133
## class3 0.0490 0.0395 0.9115
##  
## Posterior probabilities above a threshold (%): 
##          class1 class2 class3
## prob>0.7  89.38  86.67  89.74
## prob>0.8  80.00  66.67  86.32
## prob>0.9  67.50  66.67  73.50
## 

#Lo-Mendell-Rubin likelihood ratio test

calc_lrt(2044,-2668.57, 28, 2, -2644.49, 35, 3)
## Lo-Mendell-Rubin ad-hoc adjusted likelihood ratio rest:
## 
## LR = 48.160, LMR LR (df = 7) = 46.142, p < 0.001

#For models fits

summarytable(quartic_model, nl_gmm4_2, nl_gmm4_2a, which = c("G", "loglik", "conv", "npm", "AIC", "BIC", "SABIC", "entropy","ICL", "%class"))
##               G    loglik conv npm      AIC      BIC    SABIC   entropy
## quartic_model 1 -2769.120    1  21 5580.240 5657.452 5590.856 1.0000000
## nl_gmm4_2a    2 -2668.567    1  28 5393.134 5496.083 5407.288 0.7274587
## nl_gmm4_2     3 -2644.492    1  35 5358.984 5487.670 5376.677 0.7630735
##                   ICL1     ICL2   %class1   %class2  %class3
## quartic_model 5657.452 5657.452 100.00000                   
## nl_gmm4_2a    5551.245 5546.022  54.45205 45.547945         
## nl_gmm4_2     5563.675 5553.602  54.79452  5.136986 40.06849
summaryplot(quartic_model, nl_gmm4_2, nl_gmm4_2a, which = c("BIC", "entropy","loglik"))

#Plotting the best quartic GMM random slope/intercept. Importance. Predicted

#Plotting the model 
plot(nl_gmm4_2, which = "residuals")

plot(nl_gmm4_2, which = "postprob")

plot(nl_gmm4_2a, which = "residuals")

#Time points in the study
time_points <- c("T1", "T2", "T3", "T4", "T5", "T6", "T7")

#Numeric values corresponding to time points for plotting
time_numeric <- 1:7

#Intercepts for each class from the nl_gmm4_2 model
intercepts <- c(6.06240, 3.80799, 3.48675)

#Coefficients for polynomial terms for each class from the nl_gmm4_2 model
polynomials <- matrix(
  c(0.14202, 3.33939, -4.48520,  #Class 1, 2, 3 for poly1
    2.37450, 10.72373, 6.36761,   #Class 1, 2, 3 for poly2
    1.02557, 3.77645, -0.79298,   #Class 1, 2, 3 for poly3
    0.59568, -2.77164, -2.97614), #Class 1, 2, 3 for poly4
  ncol = 3, byrow = TRUE)

#Compute polynomial transformations for each time point using a 4th degree polynomial
poly_values <- poly(time_numeric, 4)

#Compute predicted values for each class and each time point
predicted_values <- sapply(1:3, function(class) {
  intercepts[class] + sum(sweep(poly_values, 2, polynomials[class, ], "*"))
})
## Warning in sweep(poly_values, 2, polynomials[class, ], "*"): STATS does not
## recycle exactly across MARGIN
## Warning in sweep(poly_values, 2, polynomials[class, ], "*"): STATS does not
## recycle exactly across MARGIN
## Warning in sweep(poly_values, 2, polynomials[class, ], "*"): STATS does not
## recycle exactly across MARGIN
#Prepare data for plotting
df_plot <- expand.grid(Time = time_points, Class = c("Class 1", "Class 2", "Class 3"))
df_plot$Important <- as.vector(predicted_values)
df_plot$Important <- pmin(pmax(df_plot$Important, 1), 7) #for values not exceeding 1/7 points

#Smoothed plot for the quartic function predictions
df_plot$Important <- pmin(pmax(df_plot$Important, 1), 7)

#Plotting the predicted values w/ a smoother
ggplot(df_plot, aes(x = Time, y = Important, color = Class, group = Class)) +
  geom_smooth(method = "lm", formula = y ~ poly(x, 4), se = FALSE, size = 1.2, aes(group = Class)) +  
  labs(title = "",
       x = "Time Point",
       y = "Predicted Importance Level") +
  scale_color_brewer(palette = "Set1") +
  scale_y_continuous("Predicted Importance Level", 
                     breaks = c(1, 2, 3, 4, 5, 6, 7), limits = c(1, 7)) +  
  theme_classic() +
  theme(legend.position = "right",
        plot.title = element_text(size = 16),
        axis.text = element_text(size = 12),
        axis.title = element_text(size = 14))
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_smooth()`).

#Plotting quartic with 2 classes

#Time points in the study
time_points <- c("T1", "T2", "T3", "T4", "T5", "T6", "T7")

#Numeric values corresponding to time points for plotting
time_numeric <- 1:7

#Intercepts for each class from the model
intercepts <- c(6.05673, 3.53197)

#Coefficients for polynomial terms for each class
polynomials <- matrix(
  c(-0.02020, -3.29596,  
    2.36139, 6.95486,    
    0.99862, -0.16953,   
    0.48238, -2.83377),  
  nrow = 4, byrow = TRUE, ncol = 2)

#Compute polynomial transformations for each time point using a 4th degree polynomial
poly_values <- poly(time_numeric, 4)

#Compute predicted values for each class and each time point
predicted_values <- sapply(1:2, function(class) {
  intercepts[class] + sum(sweep(poly_values, 2, polynomials[,class], "*"))
})

#Prepare data for plotting
df_plot <- expand.grid(Time = time_points, Class = c("Class 1", "Class 2"))
df_plot$Important <- as.vector(predicted_values)

#Plotting the trajectories
ggplot(df_plot, aes(x = Time, y = Important, color = Class, group = Class)) +
  geom_line(size = 1.2) +
  geom_point(size = 3) +
  scale_y_continuous("Predicted Importance Level", limits = c(min(df_plot$Important), max(df_plot$Important))) +
  labs(title = "Predicted Trajectories of 'Important' by Class",
       x = "Time Point",
       y = "Predicted Importance Level") +
  scale_color_brewer(palette = "Set1") +
  scale_y_continuous(limits = c(1, 7), breaks = c(1, 2, 3, 4, 5, 6, 7)) +
  theme_minimal()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.

#Smoothed
ggplot(df_plot, aes(x = Time, y = Important, color = Class, group = Class)) +
  geom_smooth(method = "lm", formula = y ~ poly(x, 4), se = FALSE, size = 1.2, aes(group = Class)) +  
  scale_y_continuous("Predicted Importance Level", limits = c(min(df_plot$Important), max(df_plot$Important))) +
  labs(title = "Predicted Trajectories of 'Important' by Class",
       x = "Time Point",
       y = "Predicted Importance Level") +
  scale_color_brewer(palette = "Set1") +
  scale_y_continuous(limits = c(1, 7), breaks = c(1, 2, 3, 4, 5, 6, 7)) +
  theme_minimal()
## Scale for y is already present.
## Adding another scale for y, which will replace the existing scale.